/*==============================================================================
US - 	Labor variables (1970) 
	Initial Conditions (1970): age*, edatt*, migration, hh_size, hh_own 

Outline:
This do file brings in state level data on
I.  	EMP, LF, UNEMP for 1970 
(adds a data point for states that were only part of composite regions)
II. 	Age structure
III. 	Educational attainment
IV. 	Household size, household ownership
V. 		Migration

Source of data used in this dofile: 
Age structure, Education
	U.S. Census, 1970 1% state fm1, downloaded from IPUMS-USA

migration
	U.S. Census, 1970 1% state fm2, downloaded from IPUMS-USA
==============================================================================*/

clear all
set more off

cd "$scratch/IPUMS USA Data/census_1970"

! uncompress usa_00067.dat.Z //Form 1 microsample 
! uncompress usa_00069.dat.Z //Form 2 microsample, for migration

quietly do usa_00067.do 

tempfile us_migration_fm2
save `us_migration_fm2.dta'
clear

quietly do usa_00069.do

*I.  	EMP, LF, WAP, UNEMP, ue_rate for 1970 (adds a data point for states that were only part of composite regions)
note: Employment variables restricted to those not in Group Quarters to be comparable with CPS data
*		Labor Force
gen LF_CENS1970_US 		= 1 if empstat>=1 & empstat<=2 & gq!=4
*		No. Unemployed
gen UNEMP_CENS1970_US 	= 1 if empstat==2  & gq!=4
*		No. Employed
gen EMP_CENS1970_US 	= 1 if empstat==1  & gq!=4
gen EMP_female			= 1 if empstat==1  & gq!=4 & sex==2

gen POP_CENS1970_US 	= 1 if gq!=4

*II. 	age structure, 

forvalues bottom = 0(5)69 {
	local top = `bottom'+4
	quietly gen age_`bottom'_`top' = 1 if age>=`bottom'  & age<=`top' 
}
quietly gen age_70_plus=1 if age>=70 
egen age_total = rsum(age_*)

*III. Educational attainment

*Years of school
forval n = 0/19 {
	gen edatt_yrs_`n' = . 
}

replace edatt_yrs_0 = 1 if educd==2 & age>=15 & gradeatt==0
replace edatt_yrs_0 = 1 if educd==11 & age>=15 & gradeatt==0 
replace edatt_yrs_1 = 1 if educd==12 & age>=15 & gradeatt==0
replace edatt_yrs_2 = 1 if educd==14 & age>=15 & gradeatt==0
replace edatt_yrs_3 = 1 if educd==15 & age>=15 & gradeatt==0
replace edatt_yrs_4 = 1 if educd==16 & age>=15 & gradeatt==0
replace edatt_yrs_5 = 1 if educd==17 & age>=15 & gradeatt==0
replace edatt_yrs_6 = 1 if educd==22 & age>=15 & gradeatt==0
replace edatt_yrs_7 = 1 if educd==23 & age>=15 & gradeatt==0
replace edatt_yrs_8 = 1 if educd==25 & age>=15 & gradeatt==0
replace edatt_yrs_9 = 1 if educd==26 & age>=15 & gradeatt==0
replace edatt_yrs_10 = 1 if educd==30 & age>=15 & gradeatt==0
replace edatt_yrs_11 = 1 if educd==40 & age>=15 & gradeatt==0
replace edatt_yrs_12 = 1 if educd==50 & age>=15 & gradeatt==0
replace edatt_yrs_13 = 1 if educd==60 & age>=15 & gradeatt==0
replace edatt_yrs_14 = 1 if educd==65 & age>=15 & gradeatt==0
replace edatt_yrs_14 = 1 if educd==70 & age>=15 & gradeatt==0
replace edatt_yrs_15 = 1 if educd==80 & age>=15 & gradeatt==0
replace edatt_yrs_16 = 1 if educd==90 & age>=15 & gradeatt==0
replace edatt_yrs_17 = 1 if educd==100 & age>=15 & gradeatt==0 
replace edatt_yrs_18 = 1 if educd==110 & age>=15 & gradeatt==0
replace edatt_yrs_19 = 1 if educd==111 & age>=15 & gradeatt==0

egen edatt_total = rsum(edatt*) 

* ==============================================================================
*IV. 	Household size, household ownership
* ==============================================================================

bys serial: gen n=_n 
gen hh_size = numprec if gq ==1 & n==1 //one observation per household

gen home_own = 1 if ownershp ==1 & gq ==1 & n==1
gen home_no_own = 1 if ownershp ==2 & gq ==1 & n==1


********************************************************************************

gen region = .
replace region=	840004 if statefip == 1
replace region=	840009 if statefip == 2
replace region=	840008 if statefip == 4
replace region=	840005 if statefip == 5
replace region=	840010 if statefip == 6
replace region=	840008 if statefip == 8
replace region=	840001 if statefip == 9
replace region=	840003 if statefip == 10
replace region=	840003 if statefip == 11
replace region=	840003 if statefip == 12
replace region=	840003 if statefip == 13
replace region=	840003 if statefip == 14
replace region=	840009 if statefip == 15
replace region=	840008 if statefip == 16
replace region=	840006 if statefip == 17
replace region=	840006 if statefip == 18
replace region=	840007 if statefip == 19
replace region=	840007 if statefip == 20
replace region=	840004 if statefip == 21
replace region=	840005 if statefip == 22
replace region=	840001 if statefip == 23
replace region=	840003 if statefip == 24
replace region=	840001 if statefip == 25
replace region=	840006 if statefip == 26
replace region=	840007 if statefip == 27
replace region=	840004 if statefip == 28
replace region=	840007 if statefip == 29
replace region=	840008 if statefip == 30
replace region=	840007 if statefip == 31
replace region=	840008 if statefip == 32
replace region=	840001 if statefip == 33
replace region=	840002 if statefip == 34
replace region=	840008 if statefip == 35
replace region=	840002 if statefip == 36
replace region=	840003 if statefip == 37
replace region=	840007 if statefip == 38
replace region=	840006 if statefip == 39
replace region=	840005 if statefip == 40
replace region=	840009 if statefip == 41
replace region=	840002 if statefip == 42
replace region=	840001 if statefip == 44
replace region=	840003 if statefip == 45
replace region=	840007 if statefip == 46
replace region=	840004 if statefip == 47
replace region=	840005 if statefip == 48
replace region=	840008 if statefip == 49
replace region=	840001 if statefip == 50
replace region=	840003 if statefip == 51
replace region=	840009 if statefip == 53
replace region=	840003 if statefip == 54
replace region=	840006 if statefip == 55
replace region=	840008 if statefip == 56

drop migplac5

tempfile precollapse_fm1
save `precollapse_fm1.dta'

*VII. 	migration
*Migration data is for ages 5+ only in Form 2
clear
use `us_migration_fm2.dta'

*the following variables refer to movements of people already in the US five years previously

gen region = .
replace region=	840004 if statefip == 1
replace region=	840009 if statefip == 2
replace region=	840008 if statefip == 4
replace region=	840005 if statefip == 5
replace region=	840010 if statefip == 6
replace region=	840008 if statefip == 8
replace region=	840001 if statefip == 9
replace region=	840003 if statefip == 10
replace region=	840003 if statefip == 11
replace region=	840003 if statefip == 12
replace region=	840003 if statefip == 13
replace region=	840003 if statefip == 14
replace region=	840009 if statefip == 15
replace region=	840008 if statefip == 16
replace region=	840006 if statefip == 17
replace region=	840006 if statefip == 18
replace region=	840007 if statefip == 19
replace region=	840007 if statefip == 20
replace region=	840004 if statefip == 21
replace region=	840005 if statefip == 22
replace region=	840001 if statefip == 23
replace region=	840003 if statefip == 24
replace region=	840001 if statefip == 25
replace region=	840006 if statefip == 26
replace region=	840007 if statefip == 27
replace region=	840004 if statefip == 28
replace region=	840007 if statefip == 29
replace region=	840008 if statefip == 30
replace region=	840007 if statefip == 31
replace region=	840008 if statefip == 32
replace region=	840001 if statefip == 33
replace region=	840002 if statefip == 34
replace region=	840008 if statefip == 35
replace region=	840002 if statefip == 36
replace region=	840003 if statefip == 37
replace region=	840007 if statefip == 38
replace region=	840006 if statefip == 39
replace region=	840005 if statefip == 40
replace region=	840009 if statefip == 41
replace region=	840002 if statefip == 42
replace region=	840001 if statefip == 44
replace region=	840003 if statefip == 45
replace region=	840007 if statefip == 46
replace region=	840004 if statefip == 47
replace region=	840005 if statefip == 48
replace region=	840008 if statefip == 49
replace region=	840001 if statefip == 50
replace region=	840003 if statefip == 51
replace region=	840009 if statefip == 53
replace region=	840003 if statefip == 54
replace region=	840006 if statefip == 55
replace region=	840008 if statefip == 56

gen migplac5_wvs_region=.
replace migplac5_wvs_region=	840004 if migplac5 == 1
replace migplac5_wvs_region=	840009 if migplac5 == 2
replace migplac5_wvs_region=	840008 if migplac5 == 4
replace migplac5_wvs_region=	840005 if migplac5 == 5
replace migplac5_wvs_region=	840010 if migplac5 == 6
replace migplac5_wvs_region=	840008 if migplac5 == 8
replace migplac5_wvs_region=	840001 if migplac5 == 9
replace migplac5_wvs_region=	840003 if migplac5 == 10
replace migplac5_wvs_region=	840003 if migplac5 == 11
replace migplac5_wvs_region=	840003 if migplac5 == 12
replace migplac5_wvs_region=	840003 if migplac5 == 13
replace migplac5_wvs_region=	840003 if migplac5 == 14
replace migplac5_wvs_region=	840009 if migplac5 == 15
replace migplac5_wvs_region=	840008 if migplac5 == 16
replace migplac5_wvs_region=	840006 if migplac5 == 17
replace migplac5_wvs_region=	840006 if migplac5 == 18
replace migplac5_wvs_region=	840007 if migplac5 == 19
replace migplac5_wvs_region=	840007 if migplac5 == 20
replace migplac5_wvs_region=	840004 if migplac5 == 21
replace migplac5_wvs_region=	840005 if migplac5 == 22
replace migplac5_wvs_region=	840001 if migplac5 == 23
replace migplac5_wvs_region=	840003 if migplac5 == 24
replace migplac5_wvs_region=	840001 if migplac5 == 25
replace migplac5_wvs_region=	840006 if migplac5 == 26
replace migplac5_wvs_region=	840007 if migplac5 == 27
replace migplac5_wvs_region=	840004 if migplac5 == 28
replace migplac5_wvs_region=	840007 if migplac5 == 29
replace migplac5_wvs_region=	840008 if migplac5 == 30
replace migplac5_wvs_region=	840007 if migplac5 == 31
replace migplac5_wvs_region=	840008 if migplac5 == 32
replace migplac5_wvs_region=	840001 if migplac5 == 33
replace migplac5_wvs_region=	840002 if migplac5 == 34
replace migplac5_wvs_region=	840008 if migplac5 == 35
replace migplac5_wvs_region=	840002 if migplac5 == 36
replace migplac5_wvs_region=	840003 if migplac5 == 37
replace migplac5_wvs_region=	840007 if migplac5 == 38
replace migplac5_wvs_region=	840006 if migplac5 == 39
replace migplac5_wvs_region=	840005 if migplac5 == 40
replace migplac5_wvs_region=	840009 if migplac5 == 41
replace migplac5_wvs_region=	840002 if migplac5 == 42
replace migplac5_wvs_region=	840001 if migplac5 == 44
replace migplac5_wvs_region=	840003 if migplac5 == 45
replace migplac5_wvs_region=	840007 if migplac5 == 46
replace migplac5_wvs_region=	840004 if migplac5 == 47
replace migplac5_wvs_region=	840005 if migplac5 == 48
replace migplac5_wvs_region=	840008 if migplac5 == 49
replace migplac5_wvs_region=	840001 if migplac5 == 50
replace migplac5_wvs_region=	840003 if migplac5 == 51
replace migplac5_wvs_region=	840009 if migplac5 == 53
replace migplac5_wvs_region=	840003 if migplac5 == 54
replace migplac5_wvs_region=	840006 if migplac5 == 55
replace migplac5_wvs_region=	840008 if migplac5 == 56

gen migration = 1 if statefip!=migplac5  & migplac5>0 & migplac5<=56  //1 if individual moved domestically within state within last 5 years

tempfile precollapse_fm2
save `precollapse_fm2.dta'

* Collapse by state and Census region used in WVS

*labor, age, schooling variables, household variables, and foreign-born population
*US states
use `precollapse_fm1.dta', clear
collapse (sum) LF_CENS-POP_CENS age_* edatt* home* (mean) hh_size [fw=perwt], by(statefip)

sort statefip
tempfile US_states_temp1
save `US_states_temp1.dta'

*Census regions
use `precollapse_fm1.dta', clear
collapse (sum) LF_CENS-POP_CENS age_* edatt* home* (mean) hh_size [fw=perwt], by(region)

sort region
tempfile Census_regions_temp1
save `Census_regions_temp1.dta'

*inmigration
*US States
use `precollapse_fm2.dta', clear

keep if statefip!=migplac5  & migplac5>0 & migplac5<=56  //keep domestic  movers only
collapse (sum) migration [fw=perwt], by(statefip) //collapse by state  movers are in now, this in inmigration to state

rename migration inmigration

sort statefip
tempfile US_states_inmigration
save `US_states_inmigration.dta'

*Census_regions
use `precollapse_fm2.dta', clear

keep if statefip!=migplac5  & migplac5>0 & migplac5<=56  //keep domestic  movers only
collapse (sum) migration [fw=perwt], by(statefip region) //collapse by WVS region movers are in now, this in inmigration to state
collapse (sum) migration, by(region)

rename migration inmigration

sort region
tempfile Census_regions_inmigration
save `Census_regions_inmigration.dta'

*outmigration
*US states
use `precollapse_fm2.dta', clear

keep if statefip!=migplac5  & migplac5>0 & migplac5<=56  //keep domestic  movers only
collapse (sum) migration [fw=perwt], by(migplac5 ) //collapse by state then left from, this is outmigration

rename migration outmigration
rename migplac5 statefip

sort statefip
tempfile US_states_outmigration
save `US_states_outmigration.dta'

*Census regions
use `precollapse_fm2.dta', clear

keep if statefip!=migplac5  & migplac5>0 & migplac5<=56  //keep domestic  movers only
collapse (sum) migration [fw=perwt], by(migplac5 migplac5_wvs_region) //collapse by state then left from, this is outmigration
collapse (sum) migration, by(migplac5_wvs_region)

rename migration outmigration
rename migplac5 region

sort region
tempfile Census_regions_outmigration
save `Census_regions_outmigration.dta'

*merge/append above initial conditions
use `Census_regions_temp1.dta', clear
merge 1:1 region using `Census_regions_outmigration.dta', gen(_merge_regions_out)
merge 1:1 region using `Census_regions_inmigration.dta', gen(_merge_regions_in)

tempfile Census_regions_all
save `Census_regions_all.dta'

use `US_states_temp1.dta', clear
merge 1:1 statefip using `US_states_outmigration.dta', gen(_merge_states_out)
merge 1:1 statefip using `US_states_inmigration.dta', gen(_merge_states_in)

append using `Census_regions_all.dta'

drop _merge*

gen migration_yrs=5

*Report data in thousands
foreach var of varlist POP* LF* EMP* UNEMP* {
	replace `var'=`var'/1000
}

*===============================================================================
* Create observation for US at country level, gen U.S. NUTS equivalent = States 
*===============================================================================
tempfile US_append 
save 	`US_append.dta'

keep if statefip!=. 
collapse (sum) POP LF EMP* UNEMP age_* edatt* home* 
gen region = 0

tempfile US_merge
save	`US_merge.dta' 

use `US_append.dta', clear
keep if statefip!=. 
collapse (mean) hh_size [aw=POP] //weight household size by POP
gen region = 0

merge 1:1 region using `US_merge.dta'

append using `US_append.dta'

gen nuts=statefip
	replace nuts= region if statefip==.

tostring nuts, replace force
replace nuts = "US: AK" if nuts=="2" 
replace nuts = "US: AL" if nuts=="1" 
replace nuts = "US: AR" if nuts=="5" 
replace nuts = "US: AS" if nuts=="60" 
replace nuts = "US: AZ" if nuts=="4" 
replace nuts = "US: CA" if nuts=="6" 
replace nuts = "US: CO" if nuts=="8" 
replace nuts = "US: CT" if nuts=="9" 
replace nuts = "US: DC" if nuts=="11" 
replace nuts = "US: DE" if nuts=="10" 
replace nuts = "US: FL" if nuts=="12" 
replace nuts = "US: GA" if nuts=="13" 
replace nuts = "US: GU" if nuts=="66" 
replace nuts = "US: HI" if nuts=="15" 
replace nuts = "US: IA" if nuts=="19" 
replace nuts = "US: ID" if nuts=="16" 
replace nuts = "US: IL" if nuts=="17" 
replace nuts = "US: IN" if nuts=="18" 
replace nuts = "US: KS" if nuts=="20" 
replace nuts = "US: KY" if nuts=="21" 
replace nuts = "US: LA" if nuts=="22" 
replace nuts = "US: MA" if nuts=="25" 
replace nuts = "US: MD" if nuts=="24" 
replace nuts = "US: ME" if nuts=="23" 
replace nuts = "US: MI" if nuts=="26" 
replace nuts = "US: MN" if nuts=="27" 
replace nuts = "US: MO" if nuts=="29" 
replace nuts = "US: MS" if nuts=="28" 
replace nuts = "US: MT" if nuts=="30" 
replace nuts = "US: NC" if nuts=="37" 
replace nuts = "US: ND" if nuts=="38" 
replace nuts = "US: NE" if nuts=="31" 
replace nuts = "US: NH" if nuts=="33" 
replace nuts = "US: NJ" if nuts=="34" 
replace nuts = "US: NM" if nuts=="35" 
replace nuts = "US: NV" if nuts=="32" 
replace nuts = "US: NY" if nuts=="36" 
replace nuts = "US: OH" if nuts=="39" 
replace nuts = "US: OK" if nuts=="40" 
replace nuts = "US: OR" if nuts=="41" 
replace nuts = "US: PA" if nuts=="42" 
replace nuts = "US: PR" if nuts=="72" 
replace nuts = "US: RI" if nuts=="44" 
replace nuts = "US: SC" if nuts=="45" 
replace nuts = "US: SD" if nuts=="46" 
replace nuts = "US: TN" if nuts=="47" 
replace nuts = "US: TX" if nuts=="48" 
replace nuts = "US: UT" if nuts=="49" 
replace nuts = "US: VA" if nuts=="51" 
replace nuts = "US: VI" if nuts=="78" 
replace nuts = "US: VT" if nuts=="50" 
replace nuts = "US: WA" if nuts=="53" 
replace nuts = "US: WI" if nuts=="55" 
replace nuts = "US: WV" if nuts=="54" 
replace nuts = "US: WY" if nuts=="56" 
replace nuts = "Entire U.S." if region==0
replace nuts = "US: New England" if region== 840001 
replace nuts = "US: Middle Atlantic States" if region==840002
replace nuts = "US: South Atlantic" if region==840003	
replace nuts = "US: East South Central" if region==840004	
replace nuts = "US: West South Central" if region==840005	
replace nuts = "US: East North Central" if region==840006	
replace nuts = "US: West North Central" if region==840007	
replace nuts = "US: Rocky Mountain state" if region==840008	
replace nuts = "US: Northwest" if region==840009
replace nuts = "US: California" if region==840010

********************************************************************************

sort nuts
gen year=1970

gen EMP_share_female = EMP_female / EMP_CENS * 100

save "$dta_files/IC_US_CENS1970", replace

keep year POP_CENS LF_CENS EMP_CENS UNEMP_CENS nuts

save "$dta_files/US_CENS1970_labor", replace

use  "$dta_files/IC_US_CENS1970", clear

drop LF EMP_CENS UNEMP* POP*

save "$dta_files/IC_US_CENS1970", replace

! compress usa_00067.dat
! compress usa_00069.dat
